# -*- coding: utf-8 -*-
import scrapy
import re
import json
from 爬虫.我主良缘.Mate_selection_Crawl.Mate_selection_Crawl.items import MaleCrawlItem
import requests
import os


class LuckySpider(scrapy.Spider):
    name = 'male'
    allowed_domains = ['www.7799520.com']
    #start_urls = ['http://www.7799520.com/']

    def start_requests(self):
        #男性信息起始i地址
        url_male = "http://www.7799520.com/api/user/pc/list/search?gender=1&marry=1&page=1"
        yield scrapy.Request(url=url_male,callback=self.get_male_data,dont_filter=True)


    def get_male_data(self,response):
        current_url = response.url
        pattern = r'http:\/\/www\.7799520\.com\/api\/user\/pc\/list\/search\?gender=1&marry=1&page=(\d+)'
        new_page = int(re.compile(pattern).findall(current_url)[0]) + 1
        new_url = "http://www.7799520.com/api/user/pc/list/search?gender=1&marry=1&page=" + str(new_page)
        yield scrapy.Request(url=new_url, callback=self.get_male_data, dont_filter=True)
        # print(response.url)
        data = response.body
        json_local_data = json.loads(str(data))
        userinfo_list = json_local_data['data']['list']
        for i in userinfo_list:
            userid = i['userid']
            userpage = "http://www.7799520.com/user/{}.html".format(userid)
            yield scrapy.Request(url=userpage, callback=self.parse_male_Page, dont_filter=True)
        pass

    def parse_male_Page(self, response):
        item = MaleCrawlItem()
        # 用户名
        id_name = response.xpath('//span[@class="nick c3e"]/text()').extract()[0]
        item["id_name"] = response.xpath('//span[@class="nick c3e"]/text()').extract()[0]
        # 下载图片
        img_url = response.xpath('//li[@class=""]/img/@src').extract()[0]
        req = requests.get(img_url)
        if not os.path.exists(r'D:\学习文件\python高阶编程\爬虫\我主良缘\male_picture\{}.jpg'.format(id_name)):
            with open(r'D:\学习文件\python高阶编程\爬虫\我主良缘\male_picture\{}.jpg'.format(id_name), 'wb') as f:
                f.write(req.content)
                f.close()
        item["picture_url"] = r'D:\学习文件\python高阶编程\爬虫\我主良缘\female_picture\{}.jpg'.format(id_name)
        # 年龄
        item["age"] = response.xpath('//span[@class="age s1"]/text()').extract()[0]
        # 性别
        item["sex"] = response.xpath('//p[@class="f18 c3e p2"]/span[2]/text()').extract()[0]
        # 婚姻状况
        item["marrage"] = response.xpath('//span[@class="marrystatus"]/text()').extract()[0]
        # 身高
        item["heigh"] = response.xpath('//span[@class="height"]/text()').extract()[0]
        # 学历
        item["education"] = response.xpath('//span[@class="education"]/text()').extract()[0]
        # 现居
        item["local"] = response.xpath('//ul[@class="clearfix user-info"]/li[1]/span/text()').extract()[0]
        # 籍贯
        item["native"] = response.xpath('//ul[@class="clearfix user-info"]/li[2]/span/text()').extract()[0]
        # 星座
        item["constellation"] = response.xpath('//ul[@class="clearfix user-info"]/li[3]/span/text()').extract()[0]
        # 生肖
        item["zodiac"] = response.xpath('//ul[@class="clearfix user-info"]/li[4]/span/text()').extract()[0]
        # 血型
        item["blood_type"] = response.xpath('//ul[@class="clearfix user-info"]/li[6]/span/text()').extract()[0]
        # 职业
        item["job"] = response.xpath('//ul[@class="clearfix user-info"]/li[7]/span/text()').extract()[0]
        # 收入
        item["income"] = response.xpath('//ul[@class="clearfix user-info"]/li[8]/span/text()').extract()[0]
        # 内心独白
        item["monologue"] = response.xpath('//div[@class="body no-border"]/p/text()').extract()[0]
        # 择偶条件
        item["select"] = ' '.join(response.xpath('//ul[@class="cm-g info-list"]/li/text()').extract()).replace('\n', '')
        # 兴趣爱好
        item["instrest"] = ' '.join(response.xpath('//dd[@class="content"]/span/text()').extract())
        yield item
